******************************************************************************************
* Implement exercises in Ciccarelli and Mojon (RESTAT 2010)
******************************************************************************************
* This code 
* 		prepares 12-month growth rates of aggregated PPI and cost shocks generated
* 		runs regressions of each country on global average, collects R2
*		saves data to be used in dynamic factor analysis (one-factor) 
*		runs static factor analysis using standardized values of the var
*		exports results to Tables.xlsx: sheets Table 1-4
* 		applies two exercises to unit labour cost
* 		exports results to Tables.xlsx: sheets Table 5
******************************************************************************************

******************************************************************************************
* Main settings
******************************************************************************************
* findit univar

set more off
clear all

*global rootfolder ""
global folder "$rootfolder\analysis"
global inputfolder17X31 "$folder\COSTSHOCKS\17X31"
global inputfolder18X31 "$folder\COSTSHOCKS\18X31"
global inputfolder35X31 "$folder\COSTSHOCKS\35X31"
global ulcfolder "$rootfolder\data\5_ULC\output"

* Create output folder if not existing:
capture mkdir "$folder\FactorAnalysis"
global outputfolder "$folder\FactorAnalysis"
cd $outputfolder

capture mkdir "$folder\FactorAnalysis\Otrok_oneFactor"
capture mkdir "$folder\FactorAnalysis\Otrok_oneFactor\data"
global otrokfolder "$folder\FactorAnalysis\Otrok_oneFactor"
capture mkdir "$folder\FactorAnalysis\Otrok_oneFactor\Otrok_output" // for Gauss output

******************************************************************************************
global YEAR_START 1995
global YEAR_END 2011
******************************************************************************************

* Results saved in: Tables.xlsx
* --- Generate sheets in order (matters in Gauss)
foreach sheet in Table_1 Table_2 Table_3 Table_4 Table_5 Table_6 Table_A4 {
	putexcel set Tables.xlsx,sheet("`sheet'") modify
	putexcel A1 = " "
}

******************************************************************************************
* Import aggregated PPI and cost shocks from excel (prepared in matlab)
******************************************************************************************

*******************
* 17 X 31
*******************
import excel using "$inputfolder17X31\\aggregatedCostShocks_allPeriods_table.xlsx",clear firstrow
save "$inputfolder17X31\\aggregatedCostShocks_allPeriods_table.dta", replace

*******************
* 18 X 31
*******************
import excel using "$inputfolder18X31\\aggregatedCostShocks_allPeriods_table_18X31.xlsx",clear firstrow
save "$inputfolder18X31\\aggregatedCostShocks_allPeriods_table_18X31.dta", replace

*******************
* 35 X 31
*******************
import excel using "$inputfolder35X31\\aggregatedCostShocks_allPeriods_table_higherOrder.xlsx",clear firstrow
save "$inputfolder35X31\\aggregatedCostShocks_allPeriods_table_higherOrder.dta", replace

**************************************
* Merge aggregated cost shocks
**************************************
use "$inputfolder17X31\\aggregatedCostShocks_allPeriods_table.dta", clear
merge 1:1 country year month using "$inputfolder18X31\\aggregatedCostShocks_allPeriods_table_18X31.dta"
assert _merge == 3
drop _merge
merge 1:1 country year month using "$inputfolder35X31\\aggregatedCostShocks_allPeriods_table_higherOrder.dta"
assert _merge == 3
drop _merge
drop PPIoo

* Do adjustments: format tsset etc.
encode country, gen(country_num)
gen date = ym(year,month)
format date %tm
*drop year month 
order  country country_num date 
tsset country_num date

rename *_ag *
rename PPI PPI_baseline
rename PPI_balI PPI_bal1
rename PPI_balII PPI_bal2
rename PPI_sagg PPI_sagg_s
rename PPI_scpi PPI_scpi_s
rename COST_mechpt_0333 COST_mechpt_1over3 
rename COST_mechpt_0667 COST_mechpt_2over3
rename COST_pcomp_0333  COST_pcomp_1over3 
rename COST_pcomp_0667  COST_pcomp_2over3
rename COST_sagg  COST_sagg_s
rename COST_scpi  COST_scpi_s

order  country country_num date PPI* COST*
tsset country_num date

******************************************************************************************
* List all the variables to be used in the analyses 
******************************************************************************************
ds, has(type double)
global ppicostlist `r(varlist)'
macro list ppicostlist

******************************************************************************************
* Compute 12-month growth rates
******************************************************************************************
foreach var in $ppicostlist {
	gen `var'_gr12m = (1 + l11.`var')*(1 + l10.`var')*(1 + l9.`var')*(1 + l8.`var')*(1 + l7.`var')*(1 + l6.`var')*(1 + l5.`var')* ///
	(1 + l4.`var')*(1 + l3.`var')*(1 + l2.`var')*(1 + l1.`var')*(1 + `var') - 1
}

* Drop missing values of the first 11 months
summarize year
drop if ym(year,month) < ym(`r(min)',12)

save "aggregatedCostShocks_12mgrowthrate.dta",replace

********************************************************************************
* 1) First exercise in Ciccarelli and Mojon:
* 	 R2's in the regression of individual country on global average
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate.dta",clear

* Initiate matrix R2 to store the results
levelsof country, local(countrylist)
local varnum : list sizeof global(ppicostlist)
local ctynum : list sizeof local(countrylist)
di `ctynum'
di `varnum'

matrix define R2 = J(`ctynum',`varnum',.)
mat colnames R2 = $ppicostlist
mat rownames R2 = `countrylist'

* Regression on ppi & generated cost shocks
local i = 1
foreach c of local countrylist {	
	local j=1
	foreach var of global ppicostlist {
	
		capture drop `var'_global_temp 
		capture drop temp_`var'_temp
		
		egen temp_`var'_temp = mean(`var'_gr12m) if country != "`c'", by(date)
		egen `var'_global_temp = mean(temp_`var'_temp), by(date)
		
		qui: reg `var'_gr12m `var'_global_temp if country == "`c'"
		mat R2[`i',`j']=e(r2)
		
		local j = `j'+1
	}
	local i=`i'+1
} 
mat list R2

* Save to use in the second exercise
save "aggregatedCostShocks_12mgrowthrate_globalavg.dta",replace

* Export matrix R2 to excel
clear
svmat R2, names(col)

gen country = ""
forvalues i=1/`=_N' {
	local c: word `i' of `countrylist'
	replace country = "`c'" in `i'
}
order country

* Main title for tables 1-4
foreach sheet in Table_1 Table_2 Table_3 Table_4{
	putexcel set Tables.xlsx,sheet("`sheet'") modify
	putexcel A1 = "R-squared"
}

* Table 1: r-squared by country: PPI12, C12
local cellstart A
local rowstart = 2 
drop if country == "ROW"
local ctynum = `ctynum' - 1
export excel country PPI_baseline COST_baseline using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_1", modify) firstrow(var)

* Tables 2-4: column headers only
local cellstart B
local rowstart = 2 
preserve
keep if country == "AUS" 
* (will be overwritten. just to write column headers)

export excel PPI_baseline COST_baseline COST_pcomp_2over3 COST_pcomp_1over3 COST_pcomp_ss COST_noFX COST_mechpt_2over3 COST_mechpt_1over3 COST_mechpt_ss using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_2", modify) firstrow(var)
export excel PPI_baseline COST_baseline PPI_bal1 PPI_bal2 PPI_domLin using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_3", modify) firstrow(var)
export excel PPI_baseline COST_baseline PPI_noOil PPIusd COST_ppiusd COST_ho COST_sagg_dropS PPI_scpi_s COST_scpi_s using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_4", modify) firstrow(var)
restore

* Export matrix stats to excel
mat stats = J(4,`varnum',.)
local i = 1
foreach v in $ppicostlist {
	qui: univar `v'
	mat stats[1,`i'] = `=r(mean)'
	mat stats[2,`i'] = `=r(p50)'
	mat stats[3,`i'] = `=r(min)'
	mat stats[4,`i'] = `=r(max)'
	local i = `i'+1
}
clear
svmat stats
gen stat = ""
replace stat = "mean" in 1
replace stat = "median" in 2
replace stat = "min" in 3
replace stat = "max" in 4

* Table 1: summary statistics: PPI12, C12 
local cellstart A
local rowstart_sumstat = 34
export excel stat stats1 stats2 using Tables.xlsx, cell(`cellstart'`rowstart_sumstat') sheet("Table_1", modify)

* Table 2: mean,median for Baseline: PPI12, C12; Pricing complementarity: C12 2/3, C12 1/3, C12 beta_u; No E C12; Mechanical pass-through: C12 2/3, C12 1/3, C12 beta_u
drop if stat == "min"
drop if stat == "max"

local cellstart A
local rowstart = 3
export excel stat stats1 stats11 stats17 stats16 stats18 stats15 stats13 stats12 stats14 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_2", modify)

* Table 3: rows: mean,median; columns R^2: Baseline: PPI12, C12; Symmetric input linkages: PPI12 Balanced 1, Balanced 2; Domestic inut linkages: PPI12
local cellstart A
local rowstart = 3
export excel stat stats1 stats11 stats2 stats3 stats4 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_3", modify)

* Table 4: rows: mean,median; columns R^2: Baseline: PPI12, C12; Symmetric input linkages: PPI12 Balanced 1, Balanced 2; Domestic input linkages: PPI12
local cellstart A
local rowstart = 3
export excel stat stats1 stats11 stats5 stats6 stats19 stats24 stats21 stats9 stats22 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_4", modify)


********************************************************************************
* 2) Second exercise in Ciccarelli and Mojon (2010)
* 	 simple factor model of inflation 
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate_globalavg.dta",clear

* Initiate matrix R2 to store the results
levelsof country, local(countrylist)
local varnum : list sizeof global(ppicostlist)
local ctynum : list sizeof local(countrylist)
di `ctynum'
di `varnum'

* Reshape dataset to wide form
keep date country *_gr12m
reshape wide *_gr12m, i(date) j(country) string

* Generate standardized values
foreach var of varlist *_gr12m* {
	egen `var'_std = std(`var')
}
rename *gr12m*_std **_std

* Static factor analysis 
* !  the main output is 1 minus "uniqueness" for each country

* Initiate the matrix to store the results
capture mat drop static_factor_results
mat static_factor_results = J(`ctynum',1,.)
local colnames dum

* Store e(Psi)
foreach var in $ppicostlist {
	factor `var'*_std, factors(1)
	mat M`var' = e(Psi)
	mat static_factor_results = (static_factor_results,M`var'')
	local colnames `colnames' `var'
}
mat colnames static_factor_results = `colnames'

* Save results [1-uniqueness]
clear
svmat static_factor_results, names(col)
drop dum

gen country = ""
forvalues i=1/`=_N' {
	local c: word `i' of `countrylist'
	replace country = "`c'" in `i'
}
order country

* - Compute [1-uniqueness]	
foreach v of global ppicostlist {
	replace `v' = 1- `v'
}

* Main title for tables 1-4
putexcel set Tables.xlsx,sheet("Table_1") modify
putexcel E1 = "Static Factor"
foreach sheet in Table_2 Table_3 Table_4{
	putexcel set Tables.xlsx,sheet("`sheet'") modify
	putexcel A6 = "Static Factor"
}

* Export [1-uniqueness]
local cellstart E
local rowstart = 2
drop if country == "ROW"
export excel country PPI_baseline COST_baseline using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_1", modify) firstrow(var)

* Prepare summary statistics
mat stats = J(4,`varnum',.)
local i = 1
foreach v of global ppicostlist {
	qui: univar `v'
	mat stats[1,`i'] = `=r(mean)'
	mat stats[2,`i'] = `=r(p50)'
	mat stats[3,`i'] = `=r(min)'
	mat stats[4,`i'] = `=r(max)'
	local i = `i'+1
}

* Save stats in excel for 1-uniqueness
clear
svmat stats
gen stat = ""
replace stat = "mean" in 1
replace stat = "median" in 2
replace stat = "min" in 3
replace stat = "max" in 4

* Table 1: summary statistics: PPI12, C12  
local cellstart E
local rowstart_sumstat = 34
export excel stat stats1 stats2 using Tables.xlsx, cell(`cellstart'`rowstart_sumstat') sheet("Table_1", modify)

* Table 2: mean,median for Baseline: PPI12, C12; Pricing complementarity: C12 2/3, C12 1/3, C12 beta_u; No E C12; Mechanical pass-through: C12 2/3, C12 1/3, C12 beta_u
drop if stat == "min"
drop if stat == "max"

local cellstart A
local rowstart = 7
export excel stat stats1 stats11 stats17 stats16 stats18 stats15 stats13 stats12 stats14 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_2", modify)

* Table 3: rows: mean,median; columns R^2: Baseline: PPI12, C12; Symmetric input linkages: PPI12 Balanced 1, Balanced 2; Domestic inut linkages: PPI12
local cellstart A
local rowstart = 7
export excel stat stats1 stats11 stats2 stats3 stats4 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_3", modify)

* Table 4: rows: mean,median; columns R^2: Baseline: PPI12, C12; Symmetric input linkages: PPI12 Balanced 1, Balanced 2; Domestic input linkages: PPI12
local cellstart A
local rowstart = 7
export excel stat stats1 stats11 stats5 stats6 stats19 stats24 stats21 stats9 stats22 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_4", modify)


********************************************************************************
* Save data for dynamic factor analysis (one-factor)
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate_globalavg.dta",clear

* Reshape dataset to wide form
keep date country *_gr12m
reshape wide *_gr12m, i(date) j(country) string

* Save variables in wide format in text - to use them in the Otrok model
foreach var in $ppicostlist {
	outsheet `var'* using "$otrokfolder\data\gvc_data_`var'_country.txt", non replace
}

********************************************************************************
* Prepare row-column headers for one-factor dynamic analysis (results will be exported in Gauss)
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate_globalavg.dta",clear

keep date country
collapse (firstnm) date, by(country)

keep country
drop if country == "ROW"

set obs `=_N+5'
replace country = "mean" in `=_N-3'
replace country = "median" in `=_N-2'
replace country = "min" in `=_N-1'
replace country = "max" in `=_N'

putexcel set Tables.xlsx,sheet("Table_1") modify
putexcel I1 = "Dynamic Factor"

foreach sheet in Table_2 Table_3 Table_4 {
	putexcel set Tables.xlsx,sheet("`sheet'") modify
	putexcel A10 = "Dynamic Factor"
	putexcel A11 = "mean"
	putexcel A12 = "median"
}

local cellstart I
local rowstart = 2
export excel country using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_1", modify) firstrow(variables) 

********************************************************************************
* Unit labour cost - quarterly frequency
********************************************************************************
use "$inputfolder17X31\\aggregatedCostShocks_allPeriods_table.dta", clear

* Do adjustments: format tsset etc.
encode country, gen(country_num)
gen date = ym(year,month)
format date %tm
*drop year month 
order  country country_num date 
tsset country_num date

rename PPI_ag PPI_baseline
rename COST_baseline_ag COST_baseline

keep country country_num date year month PPI_baseline COST_baseline
order country country_num date PPI* COST*
tsset country_num date

* Compute 12-month growth rates
foreach var in PPI_baseline COST_baseline {
	gen `var'_gr12m = (1 + l11.`var')*(1 + l10.`var')*(1 + l9.`var')*(1 + l8.`var')*(1 + l7.`var')*(1 + l6.`var')*(1 + l5.`var')* ///
	(1 + l4.`var')*(1 + l3.`var')*(1 + l2.`var')*(1 + l1.`var')*(1 + `var') - 1
}

* Drop missing values of the first 11 months
summarize year
drop if ym(year,month) < ym(`r(min)',12)

* convert to a 4 quarter growth rate by collapsing
gen quarter = 1
replace quarter = 2 if month>3
replace quarter = 3 if month>6
replace quarter = 4 if month>9

gen date_q = yq(year, quarter)
format %tq date_q
collapse PPI_baseline_gr12m COST_baseline_gr12m, by(country date_q)

*merge with the unit labor costs
merge 1:1 country date_q using "$ulcfolder\ulc_all_iso3_yoy.dta"
*drop if observations are in using but not master
drop if _merge == 2

*keep only starting 1996
drop if date_q < yq(1996,1)
*keep only the series where _merge is always 3 (that is when ULC and PPI/COST are not missing
bys country: egen avg_merge = mean(_merge)
keep if avg_merge ==3
drop _merge

save "aggregatedCostShocks_12mgrowthrate_qrtly.dta",replace

********************************************************************************
* 1) First exercise in Ciccarelli and Mojon:
* 	 R2's in the regression of individual country on global average
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate_qrtly.dta",clear

global varlist PPI_baseline_gr12m COST_baseline_gr12m ulc_yoy
* Initiate matrix R2 to store the results
levelsof country, local(countrylist)
local varnum : list sizeof global(varlist)
local ctynum : list sizeof local(countrylist)
di `ctynum'
di `varnum'

matrix define R2 = J(`ctynum',`varnum',.)
mat colnames R2 = $varlist
mat rownames R2 = `countrylist'

local i = 1
local i = 1
foreach c of local countrylist {	
	local j=1
	foreach var of global varlist {
	
		capture drop `var'_global_temp 
		capture drop temp_`var'_temp
		
		egen temp_`var'_temp = mean(`var') if country != "`c'", by(date)
		egen `var'_global_temp = mean(temp_`var'_temp), by(date)
		
		qui: reg `var' `var'_global_temp if country == "`c'"
		mat R2[`i',`j']=e(r2)
		
		local j = `j'+1
	}
	local i=`i'+1
} 
mat list R2

* Save to use in the second exercise
save "aggregatedCostShocks_12mgrowthrate_qrtly_globalavg.dta",replace

* Matrix R2
clear
svmat R2, names(col)

gen country = ""
forvalues i=1/`=_N' {
	local c: word `i' of `countrylist'
	replace country = "`c'" in `i'
}
order country

* Table 5
putexcel set Tables.xlsx,sheet("Table_5") modify
putexcel A1 = "R-squared"

local cellstart A
local rowstart = 2 
drop if country == "ROW"
local ctynum = `ctynum' - 1

preserve
keep if country == "AUS" 
* (will be overwritten. just to write column headers)
export excel country PPI_baseline COST_baseline ulc_yoy using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_5", modify) firstrow(var)
restore

* Export matrix stats to excel
mat stats = J(4,`varnum',.)
local i = 1
foreach v in $varlist {
	qui: univar `v'
	mat stats[1,`i'] = `=r(mean)'
	mat stats[2,`i'] = `=r(p50)'
	mat stats[3,`i'] = `=r(min)'
	mat stats[4,`i'] = `=r(max)'
	local i = `i'+1
}
clear
svmat stats
gen stat = ""
replace stat = "mean" in 1
replace stat = "median" in 2
replace stat = "min" in 3
replace stat = "max" in 4

* Table 2: mean,median for Baseline: PPI12, C12, ULC
drop if stat == "min"
drop if stat == "max"

local cellstart A
local rowstart = 3
export excel stat stats1 stats2 stats3 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_5", modify)


********************************************************************************
* 2) Second exercise in Ciccarelli and Mojon (2010)
* 	 simple factor model of inflation 
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate_qrtly_globalavg.dta", clear

* Initiate matrix R2 to store the results
levelsof country, local(countrylist)
local varnum : list sizeof global(varlist)
local ctynum : list sizeof local(countrylist)
di `ctynum'
di `varnum'

keep country date_q PPI_baseline_gr12m COST_baseline_gr12m ulc_yoy

* Reshape data to wide (one variable = one country)
reshape wide PPI_baseline_gr12m COST_baseline_gr12m ulc_yoy, i(date) j(country) string

* Generate standarized version of the variables
foreach var of varlist PPI_baseline_gr12m* COST_baseline_gr12m* ulc_yoy* {
	egen `var'_std = std(`var')
}

* Static factor analysis 
* !  the main output is 1 minus "uniqueness" for each country

* Initiate the matrix to store the results
capture mat drop static_factor_results
mat static_factor_results = J(`ctynum',1,.)
local colnames dum

* Store e(Psi)
foreach var in $varlist {
	factor `var'*_std, factors(1)
	mat M`var' = e(Psi)
	mat static_factor_results = (static_factor_results,M`var'')
	local colnames `colnames' `var'
}
mat colnames static_factor_results = `colnames'

* Save results [1-uniqueness]
clear
svmat static_factor_results, names(col)
drop dum

gen country = ""
forvalues i=1/`=_N' {
	local c: word `i' of `countrylist'
	replace country = "`c'" in `i'
}
order country

* - Compute [1-uniqueness]	
foreach v of global varlist {
	replace `v' = 1- `v'
}

* Main title for table 5
putexcel set Tables.xlsx,sheet("Table_5") modify
putexcel A6 = "Static Factor"

* Prepare summary statistics
mat stats = J(4,`varnum',.)
local i = 1
foreach v of global varlist {
	qui: univar `v'
	mat stats[1,`i'] = `=r(mean)'
	mat stats[2,`i'] = `=r(p50)'
	mat stats[3,`i'] = `=r(min)'
	mat stats[4,`i'] = `=r(max)'
	local i = `i'+1
}

* Save stats in excel for 1-uniqueness
clear
svmat stats
gen stat = ""
replace stat = "mean" in 1
replace stat = "median" in 2
replace stat = "min" in 3
replace stat = "max" in 4

* Table 5: mean,median for Baseline: PPI12, C12, ULC
drop if stat == "min"
drop if stat == "max"

local cellstart A
local rowstart = 7
export excel stat stats1 stats2 stats3 using Tables.xlsx, cell(`cellstart'`rowstart') sheet("Table_5", modify)

********************************************************************************
* Save data for dynamic factor analysis (one-factor)
********************************************************************************
use "aggregatedCostShocks_12mgrowthrate_qrtly_globalavg.dta",clear

keep country date_q PPI_baseline_gr12m COST_baseline_gr12m ulc_yoy

* Reshape data to wide (one variable = one country)
reshape wide PPI_baseline_gr12m COST_baseline_gr12m ulc_yoy, i(date) j(country) string
sort date

* Save variables in wide format in text - to use them in the Otrok model
foreach var in $varlist {
	outsheet `var'* using "$otrokfolder\data\gvc_data_`var'_qrtly_country.txt", non replace
}

********************************************************************************
* Prepare row-column headers for one-factor dynamic analysis (results will be exported in Gauss)
********************************************************************************
putexcel set Tables.xlsx,sheet("Table_5") modify
putexcel A10 = "Dynamic Factor"
putexcel A11 = "mean"
putexcel A12 = "median"

* Next
cd "$rootfolder\analysis"
